package book.zone.test;

import book.zone.log.Logger;
import com.sun.org.apache.xerces.internal.impl.xs.identity.Selector;

import java.io.*;
import java.util.regex.*;
import java.util.List;
import java.util.ArrayList;


/**
 * Created by IntelliJ IDEA.
 * User: NDemeshchenko
 * Date: May 21, 2010
 * Time: 12:34:42 PM
 */
public class HttpParse {

    public static void main(String[] args) throws IOException {
        
    }

    void makeParse(){
        ArrayList<String> arList = new ArrayList<String>();

        String file = "C:\\html.txt";
        String htmlCode = (String)new HttpParse().getHtmlFromFile(file);
        Pattern p = Pattern.compile("<span class=\"format\">Paperback.+");
        Matcher matcher = p.matcher(htmlCode);
        int count =0;
        while (matcher.find()) {
            arList.add(matcher.group());
            count++;
        }
        System.out.println("lines found: " +count);
        Object[] imgs = arList.toArray();
        for (Object img : imgs) {
            Logger.out(img.toString());
        }    
    }


    String getHtmlFromFile(String fileName){
        File file = new File(fileName);
        StringBuffer sBuff = new StringBuffer();
        String line= null;
        try {
            BufferedReader input = new BufferedReader(new FileReader(file));
            try {
                while((line = input.readLine()) != null){
                    sBuff.append(line).append("\n");
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            finally {
                input.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        return sBuff.toString();
    } //end of getHtmlFromFile method;
}
